Packages Used

In [1]:
import plotly
from plotly.graph_objs import Scatter, Layout
import pandas as pd
import numpy as np
import locale
locale.setlocale(locale.LC_ALL, '')
from plotly.graph_objs import *
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('seaborn-whitegrid')

Set plotly to offline; produce all plots in the notebook

In [2]:
plotly.offline.init_notebook_mode(connected=True)

Import the unprocessed data

To undo the data processing with the same values.

In [3]:
unproc_tor_df = pd.read_csv("/home/jeremydiaz/tornadoesr/data/raw/tor_data_with_derived.csv")

Storing those values.

In [4]:
mean_lat = np.mean(unproc_tor_df['BEGIN_LAT'])
stand_dev_lat = np.std(unproc_tor_df['BEGIN_LAT'])

mean_lon = np.mean(unproc_tor_df['BEGIN_LON'])
stand_dev_lon = np.std(unproc_tor_df['BEGIN_LON'])

mean_log_dam = np.mean(np.log(unproc_tor_df['DAMAGE_PROPERTY'] + 1))
stand_dev_log_dam = np.std(np.log(unproc_tor_df['DAMAGE_PROPERTY'] + 1))

Import the test set

To get location information.

In [5]:
test_set = pd.read_csv("/home/jeremydiaz/tornadoesr/Complete_Workflow/test_with_expectated_values.csv")

Undo the processing.

In [6]:
test_set['natural_scale_true'] = (10**test_set['DAMAGE_PROPERTY']) - 1

test_set['natural_scale_pred'] = (10**test_set['EXPECTED_VALUE']) - 1

test_set['natural_scale_resid'] = test_set['natural_scale_pred'] - test_set['natural_scale_true']

test_set['BEGIN_LAT'] = (test_set['BEGIN_LAT'] * stand_dev_lat) + mean_lat

test_set['BEGIN_LON'] = (test_set['BEGIN_LON'] * stand_dev_lon) + mean_lon

This will tell whether each prediction was an over- or underestimate, get the absolute difference between predicted and true values (in log-10 scale), then apply the corresponding sign for that difference (negative for underestimates and positive for overestimates).

In [7]:
sign_list = []

for i in range(len(test_set)):
    
    if test_set['natural_scale_resid'][i] > 0:
        sign_list.append(1)
        
    else:
        sign_list.append(-1)
    
test_set['natural_resid_sign'] = sign_list

test_set['log_10_abs_resid'] = np.log10(abs(test_set['natural_scale_resid']))

test_set['log_10_resid_direction'] = test_set['natural_resid_sign'] * test_set['log_10_abs_resid']

Getting a clean label for each point.

In [8]:
labels = []

for i in range(len(test_set['natural_scale_resid'])):
    
    intermediate = locale.format("%d", test_set["natural_scale_resid"][i], grouping = True)
    
    labels.append(intermediate)

resid_str_list = np.repeat('<br><br>Residual: ', len(test_set))

true_str_list = np.repeat('<br>True Damage: ', len(test_set))

exp_str_list = np.repeat('Expected Damage: ', len(test_set))


labels_better = []

for i in range(len(test_set)):
    
    intermediate_one = locale.format("%d", test_set["natural_scale_true"][i], grouping = True)
    
    intermediate_one = "$" + intermediate_one
    
    intermediate_two = locale.format("%d", test_set["natural_scale_pred"][i], grouping = True)
    
    intermediate_two = "$" + intermediate_two
    
    intermediate_comb = exp_str_list[i] + intermediate_two + true_str_list[i] + intermediate_one + resid_str_list[i] + labels[i]
    
    labels_better.append(intermediate_comb)

Import U.S. cities data

In [9]:
cities_df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_us_cities.csv')

cities_df = cities_df[cities_df['pop'] > 100000]

Get some clean, useful labels for each city point

In [10]:
cities_df['pop'] = cities_df.apply(lambda x: "{:,}".format(x['pop']), axis=1)

city_labels = "Name: " + cities_df['name'] + "<br>Population: " + cities_df['pop']

The Test Set Residual Map

This will produce the map, showing where and by how much the expected value model was wrong. Dark blues indicate strong overestimates, while dark reds indicate strong underestimates. Lighter colors indicate where the model performed relatively well.

In [11]:
trace1 = {"lon": test_set["BEGIN_LON"],
          "lat":  test_set["BEGIN_LAT"],
          "name": "Residual",
          "text": labels_better,
          "marker": {"color": test_set["log_10_resid_direction"],
                     "colorscale": [[0.0, 'rgb(165,0,38)'],
                                         [0.1111111111111111, 'rgb(215,48,39)'],
                                         [0.2222222222222222, 'rgb(244,109,67)'],
                                         [0.3333333333333333, 'rgb(253,174,97)'],
                                         [0.4444444444444444, 'rgb(254,224,144)'],
                                         [0.5555555555555556, 'rgb(224,243,248)'],
                                         [0.6666666666666666, 'rgb(171,217,233)'],
                                         [0.7777777777777778, 'rgb(116,173,209)'],
                                         [0.8888888888888888, 'rgb(69,117,180)'],
                                         [1.0, 'rgb(49,54,149)']],
                     "size": 5.5,
                     "cmin": test_set['log_10_resid_direction'].min(),
                     "cmax": -test_set['log_10_resid_direction'].min(),
                     "colorbar": dict(title = 'Direction and Magnitude<br>of Residual')},
          "type": "scattergeo", 
          "visible": True}

trace2 = {"lon": cities_df["lon"],
          "lat":  cities_df["lat"],
          "name": "City",
          "text": city_labels,
          "marker": {"size": 5,
                     "symbol": "star-open",
                     "color": "black"},
          "type": "scattergeo"}

data = Data([trace1, trace2])

layout = dict(geo = dict(scope = 'north america',
                         showland = True,
                         landcolor = "rgb(255, 255, 255)",
                         subunitcolor = "rgb(0, 0, 0)",
                         countrycolor = "rgb(0, 0, 0)",
                         showlakes = True,
                         showocean = True,
                         lakecolor = "rgb(247, 247, 247)",
                         oceancolor = "rgb(247, 247, 247)",
                         showsubunits = True,
                         showcountries = True,
                         resolution = 50,
                         lonaxis = dict(showgrid = True,
                                        gridwidth = 0.05,
                                        range= [-125.0, -70.0],
                                        dtick = 5),
                         lataxis = dict(showgrid = True,
                                        gridwidth = 0.05,
                                        range= [23.0, 50.0],
                                        dtick = 5)),
              title = 'Map of Test Set Residuals',
              showlegend = False)

fig1 = {'data':data,
       'layout':layout}

plotly.offline.iplot(fig1)

How does the expected value model fail?

Natural scale exploration

In [12]:
plt.scatter(test_set['natural_scale_true'],
            test_set['natural_scale_resid'],
            facecolor = 'k', s = 60)
plt.plot([0, 1e9], [0, -1e9], 'r', lw = 1)
plt.ylabel("Unprocessed Residual", size = 14)
plt.xlabel("True Damage", size = 14)
plt.title("Unprocessed Residuals versus True Damage\n", size = 16);

It appears, when the model underestimates by a large amount it's exclusively because it was a very damaging tornado; this may not be particularly informative, as a large true value is required for a large underestimate.

Notably, however, the model doesn't tend to overpredict by nearly as large is it underpredicts. This is likely a result of the log transformation required for model fitting.

In [13]:
plt.scatter(test_set['natural_scale_pred'],
            test_set['natural_scale_resid'],
            facecolor = 'k', s = 60)
plt.plot([0, 1e9], [0, -1e9], 'r', lw = 1)
plt.ylabel("Unprocessed Residual", size = 14)
plt.xlabel("Prediction", size = 14)
plt.title("Unprocessed Residuals versus Prediction\n", size = 16);

However the model does not exclusively underestimate when it predicts low values, as indicated by points lying above this red line.

Processed scale exploration

In [14]:
test_set['residual_of_log_xformed'] = np.log10(test_set['natural_scale_pred']) - np.log10(test_set['natural_scale_true'])
In [15]:
plt.scatter(test_set['DAMAGE_PROPERTY'],
            test_set['residual_of_log_xformed'],
            facecolor = 'k', s = 60)
plt.ylabel("Processed Residual", size = 14)
plt.xlabel("Processed True Damage", size = 14)
plt.title("Processed Residuals versus Processed True Damage\n", size = 16);

While the trend of underestimating at high values remains true in processed-scale data, it is much less severe. The trend here is actually centered on zero, with the model often overpredicting on low values.

This isn't pronounced on the previous plots because the axes are so compressed due to scale.

This reveals a problem with the model; its necessary reliance on log transformation devalues severe underestimates when fitting/optimizing.

In [16]:
plt.scatter(test_set['EXPECTED_VALUE'],
            test_set['residual_of_log_xformed'],
            facecolor = 'k', s = 60)
plt.ylabel("Processed Residual", size = 14)
plt.xlabel("Processed Prediction", size = 14)
plt.title("Processed Residuals versus Processed Prediction\n", size = 16);

Log transformation is further condemned here, as it shows that in model-scale data that it actually tends to overpredict more as it predicts higher. Again, nondamaging events are an issue.

The problem displayed by these plots is that the model is optimized using log-transformed data (which is required for decent optimization), and this processed optimization does not optimize well in respect to the extremely damaging events, because it optimizes with respect to magnitude, treating all magnitude differences equally; when in reality, being wrong in the magnitude from 1 to 100 is much less important than being wrong in the magnitude from 10,000 to 1,000,000.

Import the expected values for the grid and cities

In [17]:
grid_exp_val_2018 = pd.read_csv("/home/jeremydiaz/tornadoesr/Complete_Workflow/grid_with_expectated_values.csv")

cities_exp_val_2018 = pd.read_csv("/home/jeremydiaz/tornadoesr/Complete_Workflow/cities_with_expectated_values.csv")

Making the month variable easier to work with.

Undoing the processing.

In [18]:
grid_exp_val_2018['log_10_dam'] = np.log10(grid_exp_val_2018['DAMAGE_PROPERTY'] + 1)

grid_exp_val_2018['BEGIN_LAT'] = (grid_exp_val_2018['BEGIN_LAT'] * stand_dev_lat) + mean_lat

grid_exp_val_2018['BEGIN_LON'] = (grid_exp_val_2018['BEGIN_LON'] * stand_dev_lon) + mean_lon


cities_exp_val_2018['log_10_dam'] = np.log10(cities_exp_val_2018['DAMAGE_PROPERTY'] + 1)

cities_exp_val_2018['BEGIN_LAT'] = (cities_exp_val_2018['BEGIN_LAT'] * stand_dev_lat) + mean_lat

cities_exp_val_2018['BEGIN_LON'] = (cities_exp_val_2018['BEGIN_LON'] * stand_dev_lon) + mean_lon

Getting the labels.

In [19]:
# For the grid
labels2 = []

for i in range(len(grid_exp_val_2018['DAMAGE_PROPERTY'])):
    
    intermediate = locale.format("%d", grid_exp_val_2018["DAMAGE_PROPERTY"][i], grouping = True)
    
    intermediate = "$" + intermediate
    
    labels2.append(intermediate)
    
grid_exp_val_2018['labels'] = labels2


# For the cities
labels3 = []

for i in range(len(cities_exp_val_2018['DAMAGE_PROPERTY'])):
    
    intermediate = locale.format("%d", cities_exp_val_2018["DAMAGE_PROPERTY"][i], grouping = True)
    
    intermediate = "Name: " + str(cities_exp_val_2018['name'][i]) + "<br>Population: " + str(cities_exp_val_2018['pop'][i]) + "<br>Expected Damage: $" + intermediate
    
    labels3.append(intermediate)
    
cities_exp_val_2018['labels'] = labels3

Separating months so that the map can have a dropdown menu selection.

In [20]:
grid_jan_data = grid_exp_val_2018[grid_exp_val_2018['JULIAN_DAY'] == 15]
grid_feb_data = grid_exp_val_2018[grid_exp_val_2018['JULIAN_DAY'] == 46]
grid_mar_data = grid_exp_val_2018[grid_exp_val_2018['JULIAN_DAY'] == 74]
grid_apr_data = grid_exp_val_2018[grid_exp_val_2018['JULIAN_DAY'] == 105]
grid_may_data = grid_exp_val_2018[grid_exp_val_2018['JULIAN_DAY'] == 135]
grid_jun_data = grid_exp_val_2018[grid_exp_val_2018['JULIAN_DAY'] == 166]
grid_jul_data = grid_exp_val_2018[grid_exp_val_2018['JULIAN_DAY'] == 196]
grid_aug_data = grid_exp_val_2018[grid_exp_val_2018['JULIAN_DAY'] == 227]
grid_sep_data = grid_exp_val_2018[grid_exp_val_2018['JULIAN_DAY'] == 258]
grid_oct_data = grid_exp_val_2018[grid_exp_val_2018['JULIAN_DAY'] == 288]
grid_nov_data = grid_exp_val_2018[grid_exp_val_2018['JULIAN_DAY'] == 319]
grid_dec_data = grid_exp_val_2018[grid_exp_val_2018['JULIAN_DAY'] == 349]

cities_jan_data = cities_exp_val_2018[cities_exp_val_2018['JULIAN_DAY'] == 15]
cities_feb_data = cities_exp_val_2018[cities_exp_val_2018['JULIAN_DAY'] == 46]
cities_mar_data = cities_exp_val_2018[cities_exp_val_2018['JULIAN_DAY'] == 74]
cities_apr_data = cities_exp_val_2018[cities_exp_val_2018['JULIAN_DAY'] == 105]
cities_may_data = cities_exp_val_2018[cities_exp_val_2018['JULIAN_DAY'] == 135]
cities_jun_data = cities_exp_val_2018[cities_exp_val_2018['JULIAN_DAY'] == 166]
cities_jul_data = cities_exp_val_2018[cities_exp_val_2018['JULIAN_DAY'] == 196]
cities_aug_data = cities_exp_val_2018[cities_exp_val_2018['JULIAN_DAY'] == 227]
cities_sep_data = cities_exp_val_2018[cities_exp_val_2018['JULIAN_DAY'] == 258]
cities_oct_data = cities_exp_val_2018[cities_exp_val_2018['JULIAN_DAY'] == 288]
cities_nov_data = cities_exp_val_2018[cities_exp_val_2018['JULIAN_DAY'] == 319]
cities_dec_data = cities_exp_val_2018[cities_exp_val_2018['JULIAN_DAY'] == 349]

The 2018 expected values map

In [23]:
# Each trace can be thought of as a layer to the map
# trace1-12 are monthly data points
trace1 = {"lon": grid_jan_data["BEGIN_LON"],
          "lat":  grid_jan_data["BEGIN_LAT"],
          "name": "Expected Value",
          "text": grid_jan_data["labels"],
          "marker": {"color": grid_jan_data["log_10_dam"],
                     "colorscale": "Viridis",
                     "size": 2.5,
                     "symbol":"square",
                     "cmin": grid_exp_val_2018['log_10_dam'].min(),
                     "cmax": grid_exp_val_2018['log_10_dam'].max(),
                     "colorbar": dict(title = 'Expected Property Damage',
                                      tickmode = 'array',
                                      tickvals = [3, 4, 5, 6],
                                      ticktext = ['$1,000', '$10,000', '$100,000', '$1,000,000'])},
          "type": "scattergeo", 
          "visible": True}
trace2 = {"lon": grid_feb_data["BEGIN_LON"],
          "lat":  grid_feb_data["BEGIN_LAT"], 
          "name": "Expected Value",
          "text": grid_feb_data["labels"],
          "marker": {"color": grid_feb_data["log_10_dam"],
                     "colorscale": "Viridis",
                     "size": 2.5,
                     "symbol":"square",
                     "cmin": grid_exp_val_2018['log_10_dam'].min(),
                     "cmax": grid_exp_val_2018['log_10_dam'].max(),
                     "colorbar": dict(title = 'Expected Property Damage',
                                      tickmode = 'array',
                                      tickvals = [3, 4, 5, 6],
                                      ticktext = ['$1,000', '$10,000', '$100,000', '$1,000,000'])},
          "type": "scattergeo", 
          "visible": True}
trace3 = {"lon": grid_mar_data["BEGIN_LON"],
          "lat":  grid_mar_data["BEGIN_LAT"],
          "name": "Expected Value",
          "text": grid_mar_data["labels"],
          "marker": {"color": grid_mar_data["log_10_dam"],
                     "colorscale": "Viridis",
                     "size": 2.5,
                     "symbol":"square",
                     "cmin": grid_exp_val_2018['log_10_dam'].min(),
                     "cmax": grid_exp_val_2018['log_10_dam'].max(),
                     "colorbar": dict(title = 'Expected Property Damage',
                                      tickmode = 'array',
                                      tickvals = [3, 4, 5, 6],
                                      ticktext = ['$1,000', '$10,000', '$100,000', '$1,000,000'])},
          "type": "scattergeo", 
          "visible": True}
trace4 = {"lon": grid_apr_data["BEGIN_LON"],
          "lat":  grid_apr_data["BEGIN_LAT"],
          "name": "Expected Value",
          "text": grid_apr_data["labels"],
          "marker": {"color": grid_apr_data["log_10_dam"],
                     "colorscale": "Viridis",
                     "size": 2.5,
                     "symbol":"square",
                     "cmin": grid_exp_val_2018['log_10_dam'].min(),
                     "cmax": grid_exp_val_2018['log_10_dam'].max(),
                     "colorbar": dict(title = 'Expected Property Damage',
                                      tickmode = 'array',
                                      tickvals = [3, 4, 5, 6],
                                      ticktext = ['$1,000', '$10,000', '$100,000', '$1,000,000'])},
          "type": "scattergeo", 
          "visible": True}
trace5 = {"lon": grid_may_data["BEGIN_LON"],
          "lat":  grid_may_data["BEGIN_LAT"],
          "name": "Expected Value",
          "text": grid_may_data["labels"],
          "marker": {"color": grid_may_data["log_10_dam"],
                     "colorscale": "Viridis",
                     "size": 2.5,
                     "symbol":"square",
                     "cmin": grid_exp_val_2018['log_10_dam'].min(),
                     "cmax": grid_exp_val_2018['log_10_dam'].max(),
                     "colorbar": dict(title = 'Expected Property Damage',
                                      tickmode = 'array',
                                      tickvals = [3, 4, 5, 6],
                                      ticktext = ['$1,000', '$10,000', '$100,000', '$1,000,000'])},
          "type": "scattergeo", 
          "visible": True}
trace6 = {"lon": grid_jun_data["BEGIN_LON"],
          "lat":  grid_jun_data["BEGIN_LAT"],
          "name": "Expected Value",
          "text": grid_jun_data["labels"],
          "marker": {"color": grid_jun_data["log_10_dam"],
                     "colorscale": "Viridis",
                     "size": 2.5,
                     "symbol":"square",
                     "cmin": grid_exp_val_2018['log_10_dam'].min(),
                     "cmax": grid_exp_val_2018['log_10_dam'].max(),
                     "colorbar": dict(title = 'Expected Property Damage',
                                      tickmode = 'array',
                                      tickvals = [3, 4, 5, 6],
                                      ticktext = ['$1,000', '$10,000', '$100,000', '$1,000,000'])},
          "type": "scattergeo", 
          "visible": True}
trace7 = {"lon": grid_jul_data["BEGIN_LON"],
          "lat":  grid_jul_data["BEGIN_LAT"],
          "name": "Expected Value",
          "text": grid_jul_data["labels"],
          "marker": {"color": grid_jul_data["log_10_dam"],
                     "colorscale": "Viridis",
                     "size": 2.5,
                     "symbol":"square",
                     "cmin": grid_exp_val_2018['log_10_dam'].min(),
                     "cmax": grid_exp_val_2018['log_10_dam'].max(),
                     "colorbar": dict(title = 'Expected Property Damage',
                                      tickmode = 'array',
                                      tickvals = [3, 4, 5, 6],
                                      ticktext = ['$1,000', '$10,000', '$100,000', '$1,000,000'])},
          "type": "scattergeo", 
          "visible": True}
trace8 = {"lon": grid_aug_data["BEGIN_LON"],
          "lat":  grid_aug_data["BEGIN_LAT"],
          "name": "Expected Value",
          "text": grid_aug_data["labels"], 
          "marker": {"color": grid_aug_data["log_10_dam"],
                     "colorscale": "Viridis",
                     "size": 2.5,
                     "symbol":"square",
                     "cmin": grid_exp_val_2018['log_10_dam'].min(),
                     "cmax": grid_exp_val_2018['log_10_dam'].max(),
                     "colorbar": dict(title = 'Expected Property Damage',
                                      tickmode = 'array',
                                      tickvals = [3, 4, 5, 6],
                                      ticktext = ['$1,000', '$10,000', '$100,000', '$1,000,000'])},
          "type": "scattergeo", 
          "visible": True}
trace9 = {"lon": grid_sep_data["BEGIN_LON"],
          "lat":  grid_sep_data["BEGIN_LAT"],
          "name": "Expected Value",
          "text": grid_sep_data["labels"],
          "marker": {"color": grid_sep_data["log_10_dam"],
                     "colorscale": "Viridis",
                     "size": 2.5,
                     "symbol":"square",
                     "cmin": grid_exp_val_2018['log_10_dam'].min(),
                     "cmax": grid_exp_val_2018['log_10_dam'].max(),
                     "colorbar": dict(title = 'Expected Property Damage',
                                      tickmode = 'array',
                                      tickvals = [3, 4, 5, 6],
                                      ticktext = ['$1,000', '$10,000', '$100,000', '$1,000,000'])},
          "type": "scattergeo", 
          "visible": True}
trace10 = {"lon": grid_oct_data["BEGIN_LON"],
           "lat":  grid_oct_data["BEGIN_LAT"],
           "name": "Expected Value",
           "text": grid_oct_data["labels"],
           "marker": {"color": grid_oct_data["log_10_dam"],
                      "colorscale": "Viridis",
                      "size": 2.5,
                      "symbol":"square",
                      "cmin": grid_exp_val_2018['log_10_dam'].min(),
                      "cmax": grid_exp_val_2018['log_10_dam'].max(),
                      "colorbar": dict(title = 'Expected Property Damage',
                                       tickmode = 'array',
                                       tickvals = [3, 4, 5, 6],
                                       ticktext = ['$1,000', '$10,000', '$100,000', '$1,000,000'])},
           "type": "scattergeo", 
           "visible": True}
trace11 = {"lon": grid_nov_data["BEGIN_LON"],
           "lat":  grid_nov_data["BEGIN_LAT"],
           "name": "Expected Value",
           "text": grid_nov_data["labels"],
           "marker": {"color": grid_nov_data["log_10_dam"],
                      "colorscale": "Viridis",
                      "size": 2.5,
                      "symbol":"square",
                      "cmin": grid_exp_val_2018['log_10_dam'].min(),
                      "cmax": grid_exp_val_2018['log_10_dam'].max(),
                      "colorbar": dict(title = 'Expected Property Damage',
                                       tickmode = 'array',
                                       tickvals = [3, 4, 5, 6],
                                       ticktext = ['$1,000', '$10,000', '$100,000', '$1,000,000'])},
           "type": "scattergeo", 
           "visible": True}
trace12 = {"lon": grid_dec_data["BEGIN_LON"],
           "lat":  grid_dec_data["BEGIN_LAT"],
           "name": "Expected Value",
           "text": grid_dec_data["labels"],
           "marker": {"color": grid_dec_data["log_10_dam"],
                      "colorscale": "Viridis",
                      "size": 2.5,
                      "symbol":"square",
                      "cmin": grid_exp_val_2018['log_10_dam'].min(),
                      "cmax": grid_exp_val_2018['log_10_dam'].max(),
                      "colorbar": dict(title = 'Expected Property Damage',
                                       tickmode = 'array',
                                       tickvals = [3, 4, 5, 6],
                                       ticktext = ['$1,000', '$10,000', '$100,000', '$1,000,000'])},
           "type": "scattergeo", 
           "visible": True}


# trace13-24 are identical and are the cities
# They are repeated so that they can be redrawn for each dropdown selection
# Otherwise they become background to the prediction points
# And therefore less visible
trace13 = {"lon": cities_jan_data["BEGIN_LON"],
           "lat": cities_jan_data["BEGIN_LAT"],
           "name": "City",
           "text": cities_jan_data["labels"],
           "marker": {"size": 4,
                      "symbol": "star-open",
                      "color": "beige"},
           "type": "scattergeo"}
trace14 = {"lon": cities_feb_data["BEGIN_LON"],
           "lat": cities_feb_data["BEGIN_LAT"],
           "name": "City",
           "text": cities_feb_data["labels"],
           "marker": {"size": 4,
                      "symbol": "star-open",
                      "color": "beige"},
           "type": "scattergeo"}
trace15 = {"lon": cities_mar_data["BEGIN_LON"],
           "lat": cities_mar_data["BEGIN_LAT"],
           "name": "City",
           "text": cities_mar_data["labels"],
           "marker": {"size": 4,
                      "symbol": "star-open",
                      "color": "beige"},
           "type": "scattergeo"}
trace16 = {"lon": cities_apr_data["BEGIN_LON"],
           "lat": cities_apr_data["BEGIN_LAT"],
           "name": "City",
           "text": cities_apr_data["labels"],
           "marker": {"size": 4,
                      "symbol": "star-open",
                      "color": "beige"},
           "type": "scattergeo"}
trace17 = {"lon": cities_may_data["BEGIN_LON"],
           "lat": cities_may_data["BEGIN_LAT"],
           "name": "City",
           "text": cities_may_data["labels"],
           "marker": {"size": 4,
                      "symbol": "star-open",
                      "color": "beige"},
           "type": "scattergeo"}
trace18 = {"lon": cities_jun_data["BEGIN_LON"],
           "lat": cities_jun_data["BEGIN_LAT"],
           "name": "City",
           "text": cities_jun_data["labels"],
           "marker": {"size": 4,
                      "symbol": "star-open",
                      "color": "beige"},
           "type": "scattergeo"}
trace19 = {"lon": cities_jul_data["BEGIN_LON"],
           "lat": cities_jul_data["BEGIN_LAT"],
           "name": "City",
           "text": cities_jul_data["labels"],
           "marker": {"size": 4,
                      "symbol": "star-open",
                      "color": "beige"},
           "type": "scattergeo"}
trace20 = {"lon": cities_aug_data["BEGIN_LON"],
           "lat": cities_aug_data["BEGIN_LAT"],
           "name": "City",
           "text": cities_aug_data["labels"],
           "marker": {"size": 4,
                      "symbol": "star-open",
                      "color": "beige"},
           "type": "scattergeo"}
trace21 = {"lon": cities_sep_data["BEGIN_LON"],
           "lat": cities_sep_data["BEGIN_LAT"],
           "name": "City",
           "text": cities_sep_data["labels"],
           "marker": {"size": 4,
                      "symbol": "star-open",
                      "color": "beige"},
           "type": "scattergeo"}
trace22 = {"lon": cities_oct_data["BEGIN_LON"],
           "lat": cities_oct_data["BEGIN_LAT"],
           "name": "City",
           "text": cities_oct_data["labels"],
           "marker": {"size": 4,
                      "symbol": "star-open",
                      "color": "beige"},
           "type": "scattergeo"}
trace23 = {"lon": cities_nov_data["BEGIN_LON"],
           "lat": cities_nov_data["BEGIN_LAT"],
           "name": "City",
           "text": cities_nov_data["labels"],
           "marker": {"size": 4,
                      "symbol": "star-open",
                      "color": "beige"},
           "type": "scattergeo"}
trace24 = {"lon": cities_dec_data["BEGIN_LON"],
           "lat": cities_dec_data["BEGIN_LAT"],
           "name": "City",
           "text": cities_dec_data["labels"],
           "marker": {"size": 4,
                      "symbol": "star-open",
                      "color": "beige"},
           "type": "scattergeo"}


# Acknowledge all the traces as a the map data
data2 = Data([trace1, trace2, trace3, trace4,
              trace5, trace6, trace7, trace8,
              trace9, trace10, trace11, trace12,
              trace13, trace14, trace15, trace16,
              trace17, trace18, trace19, trace20,
              trace21, trace22, trace23, trace24])


# Design the layout/background
layout2 = dict(geo = dict(scope = 'north america',
                          showland = True,
                          landcolor = "rgb(0, 0, 0)",
                          subunitcolor = "rgb(255, 255, 255)",
                          countrycolor = "rgb(255, 255, 255)",
                          showlakes = True,
                          showocean = True,
                          lakecolor = "rgb(23, 23, 23)",
                          oceancolor = "rgb(23, 23, 23)",
                          showsubunits = True,
                          showcountries = True,
                          resolution = 50,
                          lonaxis = dict(showgrid = True,
                                         gridwidth = 0.05,
                                         range= [-125.0, -70.0],
                                         dtick = 5),
                          lataxis = dict(showgrid = True,
                                         gridwidth = 0.05,
                                         range= [23.0, 50.0],
                                         dtick = 5)),
               title = 'How much damage would a tornado cause in...',
               showlegend = False)


# Set up the dropdown selections
updatemenus = [{'buttons': [{'args': ['visible', [True, False, False, False,
                                                  False, False, False, False,
                                                  False, False, False, False,
                                                  True, False, False, False,
                                                  False, False, False, False,
                                                  False, False, False, False]],
                             'label': '... January?',
                             'method': 'restyle'},
                            {'args': ['visible', [False, True, False, False,
                                                  False, False, False, False,
                                                  False, False, False, False,
                                                  False, True, False, False,
                                                  False, False, False, False,
                                                  False, False, False, False]],
                             'label': '... February?',
                             'method': 'restyle'},
                            {'args': ['visible', [False, False, True, False,
                                                  False, False, False, False,
                                                  False, False, False, False,
                                                  False, False, True, False,
                                                  False, False, False, False,
                                                  False, False, False, False]],
                             'label': "... March?",
                             'method': 'restyle'},
                            {'args': ['visible', [False, False, False, True,
                                                  False, False, False, False,
                                                  False, False, False, False,
                                                  False, False, False, True,
                                                  False, False, False, False,
                                                  False, False, False, False]],
                             'label': "... April?",
                             'method': 'restyle'},
                            {'args': ['visible', [False, False, False, False,
                                                  True, False, False, False,
                                                  False, False, False, False,
                                                  False, False, False, False,
                                                  True, False, False, False,
                                                  False, False, False, False]],
                             'label': "... May?",
                             'method': 'restyle'},
                            {'args': ['visible', [False, False, False, False,
                                                  False, True, False, False,
                                                  False, False, False, False,
                                                  False, False, False, False,
                                                  False, True, False, False,
                                                  False, False, False, False]],
                             'label': "... June?",
                             'method': 'restyle'},
                            {'args': ['visible', [False, False, False, False,
                                                  False, False, True, False,
                                                  False, False, False, False,
                                                  False, False, False, False,
                                                  False, False, True, False,
                                                  False, False, False, False]],
                             'label': "... July?",
                             'method': 'restyle'},
                            {'args': ['visible', [False, False, False, False,
                                                  False, False, False, True,
                                                  False, False, False, False,
                                                  False, False, False, False,
                                                  False, False, False, True,
                                                  False, False, False, False]],
                             'label': "... August?",
                             'method': 'restyle'},
                            {'args': ['visible', [False, False, False, False,
                                                  False, False, False, False,
                                                  True, False, False, False,
                                                  False, False, False, False,
                                                  False, False, False, False,
                                                  True, False, False, False]],
                             'label': "... September?",
                             'method': 'restyle'},
                            {'args': ['visible', [False, False, False, False,
                                                  False, False, False, False,
                                                  False, True, False, False,
                                                  False, False, False, False,
                                                  False, False, False, False,
                                                  False, True, False, False]],
                             'label': "... October?",
                             'method': 'restyle'},
                            {'args': ['visible', [False, False, False, False,
                                                  False, False, False, False,
                                                  False, False, True, False,
                                                  False, False, False, False,
                                                  False, False, False, False,
                                                  False, False, True, False]],
                             'label': "... November?",
                             'method': 'restyle'},
                            {'args': ['visible', [False, False, False, False,
                                                  False, False, False, False,
                                                  False, False, False, True,
                                                  False, False, False, False,
                                                  False, False, False, False,
                                                  False, False, False, True]],
                             'label': "... December?",
                             'method': 'restyle'}],
               'type': 'buttons'}]


# Add those dropdown selections to the layout information
layout2['updatemenus'] = updatemenus


# Make the figure
fig2 = {'data':data2,
       'layout':layout2}


# Plot the figure
plotly.offline.iplot(fig2)

As determined by two artificial neural networks trained on the past 20 years of tornado records. Variables influencing the prediction include (but are not limited to): population density, median household income, Landsat-derived land cover classes, time of year, latitude, and longitude.

One artificial neural network predicts the probability of a tornado causing damage (accuracy = 82%, AUROC = 0.87), while the other predicts how much damage the tornado would cause given that damage is certain (R2 = 0.47). When multiplied, the output of these two neural networks generate an expected value, which is visualized above.